Load packages
library(tidyverse)
library(gridExtra)
library(cowplot)
library(ggridges)
library(ggstance)
library(treeio)
library(ggtree)
library(tidytree)
Load and combine data
species <- read_csv("../data/species_data.csv") %>% select(-species)
data <- read_csv("../data/data.csv") %>%
left_join(species, by = c("species" = "species_formatted")) %>%
rename(label = species_latin)
data2 <- data %>% group_by(species, label) %>% summarise(n = sum(n))
fulltree <- read.nexus("../data/consensusTree_10kTrees_298Primates_V3.nex")
refs <- read_csv("../data/ref_nodes.csv")
# turn tree into tidy dataframe
tree2 <- as_tibble(fulltree)
tree3 <- tree2 %>%
left_join(data2) %>%
mutate(
hasN = ifelse(is.na(n), 0, .5),
hasN2 = ifelse(is.na(n), .1, .5)) %>%
left_join(refs) %>%
# # also merge w datasheet listing node, n for inner nodes
# left_join(Ns, by = "node") %>%
# mutate(n = coalesce(n.x, n.y)) %>%
# select(-n.x, -n.y) %>%
groupClade(c(493, 496, 429, 302, 408)) %>%
mutate(group = fct_recode(group, "2" = "1"))
Joining, by = "label"
Joining, by = "node"
# turn back into tree
tree4 <- as.treedata(tree3)
This makes a rectangular and a circular tree with the node numbers displayed for reference (saved in the graphs folder).
tree3.2 <- as.treedata(tree3)
# display node numbers for reference
ggtree(tree3.2) +
# tip labels
geom_tippoint(aes(size = n), col = "seagreen", alpha = .5) +
geom_tiplab(offset = 1, size = 3) +
# node labels
geom_text(aes(label = node, x = branch), size = 2, col = "blue", vjust = -.5) +
expand_limits(x = 90) +
# display timescale at the bottom
theme_tree2()
ggsave("../graphs/full_tree_nodes.pdf", width = 8, height = 20, scale = 2)
ggtree(tree3.2, layout = "circular") +
geom_tippoint(aes(size = n), col = "seagreen", alpha = .5) +
geom_tiplab2(offset = 2, size = 3) +
geom_text2(aes(label = node), size = 1.5, col = "blue") +
xlim(NA, 100)
ggsave("../graphs/full_tree_nodes_circular.pdf", width = 8, height = 8, scale = 2)
cols <- viridis::viridis(4, end = .9)
# base plot
p <- ggtree(tree4, aes(size = hasN, alpha = hasN2), layout = "circular") +
# root
geom_rootpoint(size = 1) +
# tips
geom_tippoint(aes(size = n), alpha = .5) +
geom_tiplab2(aes(alpha = hasN), offset = 2, size = 3) +
# tweak scales
scale_alpha_continuous(range = c(.3, 1)) +
scale_size_continuous(range = c(.5, 8)) +
# widen plotting area
xlim(NA, 100)
# highlight clades with background colors
p2 <- p +
geom_hilight(node = 493, fill = cols[1], alpha = .3) +
geom_hilight(node = 496, fill = cols[1], alpha = .3) +
geom_hilight(node = 429, fill = cols[2], alpha = .3) +
geom_hilight(node = 303, fill = cols[3], alpha = .3) +
geom_hilight(node = 408, fill = cols[4], alpha = .3) +
# plot tree again to be on top of the highlights
geom_tree() +
geom_rootpoint(size = 1)
# highlight clades with branch colors
p3 <- p +
aes(col = group) +
scale_color_manual(values = c("gray30", cols))
plots <- mget(c("p", "p2", "p3"))
grid.arrange(p, p2, p3, nrow = 1)
# png with 3x1
ggsave("../graphs/phylo_full.png", arrangeGrob(grobs = plots, nrow = 1),
width = 24, height = 8, scale = 2, dpi = 72)
Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).
# pdf with 1 per page
ggsave("../graphs/phylo_full.pdf", marrangeGrob(grobs = plots, nrow = 1, ncol = 1),
width = 8, height = 8, scale = 2, dpi = 72)
Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).Removed 287 rows containing missing values (geom_point_g_gtree).
# subset tree to just those species who have sample sizes reported, i.e. those who were tested
to_drop <- tree3 %>% filter(is.na(n)) %>% pull(label)
tree5 <- drop.tip(tree4, to_drop)
h <- tibble(reference = c(NA, 1:Ntip(tree5))), .panel = c("Tree", rep("ٴSample sizes", Ntip(tree5))))
Error: unexpected ',' in "h <- tibble(reference = c(NA, 1:Ntip(tree5))),"
# right-side viz depends on the number of sites per species:
# 1 site = vertical crossbar only
# 2+ sites = points + crossbar at median
# X+ sites = densities (currently, X = 4 just to illustrate)
# dirty hack: there's a small character in front of "Sample sizes" to have that panel sort to the right (alphabetically) until I figure out why it doesn't just go by order. This cropped up as an issue when I added the dummy point for the x-axis expansion...
# LEFT FACET
q <- ggtree(tree5, aes(col = group)) +
# this is a dummy point to expand the x scale
# the typical ways weirdly also expand it for the sample size panel once that's added
geom_point(data = tibble(x = 135, y = 1, .panel = "Tree", group = NA)) +
# tip labels
geom_tippoint(aes(size = n), alpha = .5) +
geom_tiplab(offset = 4, size = 3) +
# tweak scales
scale_color_manual(values = c("grey30", cols)) +
scale_fill_manual(values = cols[4]) + # when all categories are taken: cols
# display timescale at the bottom
theme_tree2() +
# add reference lines (these will show up on right panel of facet_plot only)
geom_hline(data = h, aes(yintercept = reference), lwd = .2, col = "grey", alpha = .5) +
geom_vline(data = v, aes(xintercept = reference), lwd = 1.5, col = "grey", alpha = .3) +
# remove facet strips
# or element_text(size = 14)
theme(strip.text = element_blank(), strip.background = element_blank(),
plot.margin = unit(rep(.5, 4), "cm"))
# ADD RIGHT FACET
q %>%
facet_plot("ٴSample sizes",
d3a, geom_density_ridges, aes(x = num, group = label, fill = group),
alpha = .5, lwd = .3, position = position_nudge(y = .1)) %>%
facet_plot("ٴSample sizes",
d4, geom_crossbarh, aes(x = Mdn, xmin = Mdn, xmax = Mdn, group = label,
col = group), alpha = .5, width = .3) %>%
facet_plot("ٴSample sizes",
d3b, geom_point, aes(x = num2, group = label), alpha = .5) #+
# # for rugplot (vertical line instead of point):
# facet_plot("ٴSample sizes",
# d3b, geom_point, aes(x = num2, group = label), shape = "|", size = 3) +
ggsave("../graphs/phylo_ridge.png", width = 4, height = 3, scale = 2)
Relevant functions to tinker with, if necessary:
facet_plot
function (p, panel, data, geom, mapping = NULL, ...)
{
p <- add_panel(p, panel)
df <- p %+>% data
p + geom(data = df, mapping = mapping, ...)
}
<bytecode: 0x7fef95ad0a38>
<environment: namespace:ggtree>
ggtree:::add_panel
function (p, panel)
{
df <- p$data
if (is.null(df[[".panel"]])) {
df[[".panel"]] <- factor("Tree")
}
levels(df$.panel) %<>% c(., panel)
p$data <- df
p + facet_grid(. ~ .panel, scales = "free_x")
}
<bytecode: 0x7fef95ad1f38>
<environment: namespace:ggtree>
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS Mojave 10.14.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] tidytree_0.2.8 ggtree_1.16.6 treeio_1.8.2 ggstance_0.3.3 ggridges_0.5.1
[6] cowplot_1.0.0 gridExtra_2.3 forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
[11] purrr_0.3.2 readr_1.3.1 tidyr_1.0.0 tibble_2.1.3 ggplot2_3.2.1
[16] tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] tidyselect_0.2.5 xfun_0.10 reshape2_1.4.3 haven_2.1.1 lattice_0.20-38
[6] colorspace_1.4-1 vctrs_0.2.0 generics_0.0.2 viridisLite_0.3.0 rlang_0.4.0
[11] pillar_1.4.2 glue_1.3.1 withr_2.1.2 modelr_0.1.5 readxl_1.3.1
[16] rvcheck_0.1.5 lifecycle_0.1.0 plyr_1.8.4 munsell_0.5.0 gtable_0.3.0
[21] cellranger_1.1.0 rvest_0.3.4 labeling_0.3 knitr_1.25 parallel_3.6.1
[26] broom_0.5.2 Rcpp_1.0.2 BiocManager_1.30.7 scales_1.0.0 backports_1.1.5
[31] jsonlite_1.6 hms_0.5.1 stringi_1.4.3 grid_3.6.1 cli_1.1.0
[36] tools_3.6.1 magrittr_1.5 lazyeval_0.2.2 crayon_1.3.4 ape_5.3
[41] pkgconfig_2.0.3 zeallot_0.1.0 xml2_1.2.2 lubridate_1.7.4 viridis_0.5.1
[46] assertthat_0.2.1 httr_1.4.1 rstudioapi_0.10 R6_2.4.0 nlme_3.1-141
[51] compiler_3.6.1